Aller au contenu

Module:Wikidata/Chemin/parser

Cette page fait l’objet d’une mesure de semi-protection étendue.
Une page de Wikipédia, l'encyclopédie libre.

 Documentation[voir] [modifier] [historique] [purger]


local tool = require("Module:Utilitaire")
local path = require "Module:Wikidata/Chemin/Path"
local parser = require "Module:FParser"

local pparser = {}

--[[

grammar : 

letter                  ::= "A" | "B" | "C" | "D" | "E" | "F" | "G"
                          | "H" | "I" | "J" | "K" | "L" | "M" | "N"
                          | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
                          | "V" | "W" | "X" | "Y" | "Z" | "a" | "b"
                          | "c" | "d" | "e" | "f" | "g" | "h" | "i"
                          | "j" | "k" | "l" | "m" | "n" | "o" | "p"
                          | "q" | "r" | "s" | "t" | "u" | "v" | "w"
                          | "x" | "y" | "z" ;
digit                   ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
space                   ::= " " ;

Pid                     ::= "P" , digit, { digit } ;
Pname                   ::= letter, { letter | digit | space | "'" } ;

PathFirstLevel          ::= pathFirstAlternative

-- Rules specific to allow to start from a statement instead of an item on the highest level of a path, variant of PathAlternative and PathSequence

pathFirstAlternative   ::= PathFirstSequence ( '|' PathFirstSequence )*
PathFirstSequence 
                        ::= ('>' PathQualifier | PathEltOrInverse ) ( '/' PathEltOrInverse | '^' PathElt )*

Path			::= PathAlternative
PathAlternative 	::= 	PathSequence ( '|' PathSequence )*
PathSequence		::= 	PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )*
PathElt			::= 	PathPrimary PathMod?
PathEltOrInverse	::= 	PathElt | '^' PathElt
PathMod			::= 	( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) )
PathPrimary		::= ( Prop | 'a' | '(' Path ')' 
                            | ( Prop | '!' PathNegatedPropertySet ) '>' PathQualifier
                            | '!' PathNegatedPropertySet )
PathQualifier           ::= ( Prop | '!' PathNegatedPropertySet | PathPropertySet )
                    
Prop                    ::= IRIref | Pid | Pname

rules 95 and 96 in https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#rPathNegatedPropertySet

PathNegatedPropertySet  ::=  	PathOneInPropertySet | '(' ( PathOneInPropertySet ( '|' PathOneInPropertySet )* )? ')'
PathOneInPropertySet    ::=  	iri | 'a' | '^' ( iri | 'a' ) 

PathPropertySet         ::=   '(' Path ( '|' Path )+ ')'

For information, SPARQL property path grammar :

https://www.w3.org/TR/sparql11-property-paths/#path-syntax

TriplesSameSubjectPath  ::=   	VarOrTerm PropertyListNotEmptyPath | TriplesNode PropertyListPath
PropertyListPath        ::= 	PropertyListNotEmpty?
PropertyListNotEmptyPath::= 	( VerbPath | VerbSimple ) ObjectList ( ';' ( ( VerbPath | VerbSimple ) ObjectList )? )*
VerbPath 	        ::= 	Path
VerbSimple 	        ::= 	Var
Path 	                ::= 	PathAlternative
PathAlternative         ::= 	PathSequence ( '|' PathSequence )*
PathSequence            ::=	PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )*
PathElt                 ::= 	PathPrimary PathMod?
PathEltOrInverse        ::= 	PathElt | '^' PathElt
PathMod                 ::= 	( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) )
PathPrimary             ::= 	( IRIref | 'a' | '(' Path ')' ) 

--]] 

local lexer = parser.lexer

local chain = parser.chain
local alternative = parser.alternative
local plus = parser.plus
local idop = parser.idop
local nary_op_parser = parser.nary_op_parser
local lex_char = lexer.lex_char
local parse_epsilon = lexer.lex_epsilon
local lex_integer = lexer.lex_integer

----------------------------------------------------------------------
-- grammar base lexer functions
----------------------------------------------------------------------

local lex_pid = function(state)
	local res = lexer.lex_regex(state, "P[0-9]+")
	if res then res.type="Pid" return res end
end

local lex_sparql_prefix = function(state)
	local res = lexer.lex_regex(state, "[a-z_]*")
	if res then res.type="prefix" return res end
end

local lex_property_name = function(state)
	local res = lexer.lex_regex(state, "[a-zA-Z][a-z A-Z'-]*")
	if res then res.type="Plabel" return res end
end

-------------------------------------------------------------------



-- PathElt 	 ::= 	PathPrimary PathMod?
-- PathMod 	 ::= 	( '*' | '?' | '+' | '{' ( Integer ( ',' ( '}' | Integer '}' ) | '}' ) ) )

function pparser.pathElt(state)
	local node
	local prime_node
	
	local min_bound = nil
	local max_bound = nil
	
	local function create_node(type)
		return idop(
			function(state)
				node = type:create(prime_node, min_bound, max_bound)
			end
		)
	end
	
	local res = chain{
		pparser.pathPrimary,
		idop(function(state) prime_node = state.node end),
		alternative{
			chain{
				lex_char("*"),
				create_node(path.StarNode)
			},
		    chain{
				lex_char("+"),
				create_node(path.PlusNode)
			},
		    chain{
				lex_char("?"),
				create_node(path.MaybeNode)
			},
		    chain{
				lex_char("^"),
				create_node(path.InverseNode)
			},
		    chain{
				lex_char("{"),
				lex_integer,
				idop(function(state) min_bound = tonumber(state.lexed) end),
				alternative{
					chain{
						lex_char(","), 
						lex_integer,
						idop(function(state) max_bound = tonumber(state.lexed) end)
					},
					chain{
						parse_epsilon, 
						idop(function(state) max_bound = nil end)
					}
				},
				create_node(path.BetweenNode, min_bound, max_bound),
				lex_char("}"),
			},
			chain{
				parse_epsilon,
				idop(function(state) node = prime_node end)
			}
		}
	}(state)

	if res then
		res.node = node
		return res
	end
end


-- PathEltOrInverse 	 ::= 	PathElt | '^' PathElt
pparser.pathEltOrInverse = function(state)
	return alternative{
		pparser.pathElt,
		chain{
			lex_char("^"),
			pparser.pathElt,
			function(state)
				state.node = path.InverseNode(state.node)
				return state
			end
		}
	}(state)
end


--[[ 

Tests :

plop=p.parse("P31",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop)
yes
property=>
   P31

plop=p.parse("P31>P279", p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop) 
yes
property=>
   P279
node=>
   P31

plop=p.parse("P31{1,6}",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop)


plop=p.parse("(P31|P17>P31)",p.pathElt) ; t = require "Module:Tools" ; t.dump_to_console(plop) 
yes
nodes=>
   1=>
      property=>
         P31
   2=>
      property=>
         P31
      node=>
         P17

--]]


pparser.pathSequence = nary_op_parser(
	pparser.pathEltOrInverse,
	alternative{
		chain{
			lexer.lex_char("/"), 
			pparser.pathEltOrInverse,
		},
		chain{
			lexer.lex_char("\^"), 
			pparser.pathElt,
			function(state) 
				state.node = path.InverseNode:create(state.node) 
				return state
			end
		}
	},
	function(acc) return path.SequenceNode:create(acc) end
)


--[[
Tests:

plop=p.parse("P31/P31+",p.pathSequence) ; t = require "Module:Tools" ; t.dump_to_console(plop) 
yes
nodes=>
   1=>
      property=>
         P31
   2=>
      node=>
         property=>
            P31
--]]


-- PathAlternative 	 ::= 	PathSequence ( '|' PathSequence )*

pparser.pathAlternative = nary_op_parser(
	pparser.pathSequence,
	chain{
		lex_char("[|]"), 
		pparser.pathSequence
	},
	function(acc) return path.AlternativeNode:create(acc) end
)
	
--[[
plop=p.parse("P31|P17/P279+",p.pathAlternative) ; t = require "Module:Tools" ; t.dump_to_console(plop) 
yes
nodes=>
   1=>
      property=>
         P31
   2=>
      nodes=>
         1=>
            property=>
               P17
         2=>
            node=>
               property=>
                  P279
                  
plop=p.parse("P31|P17>P31/P279+",p.pathAlternative) ; t = require "Module:Tools" ; t.dump_to_console(plop) 
yes
nodes=>
   1=>
      property=>
         P31
   2=>
      nodes=>
         1=>
            property=>
               P31
            node=>
               P17
         2=>
            node=>
               property=>
                  P279

--]]


-- PathSequence 	 ::= 	PathEltOrInverse ( '/' PathEltOrInverse | '^' PathElt )* 


local instance = function()
	-- P31/P279*
	return path.SequenceNode:create(
		{
			path.PropertyNode:create("P31"),
			path.StarNode:create(path.PropertyNode:create("P279"))
		}
	)
end

-- PathPrimary 	 ::= ( Prop | '!'  NegatedPropertySet ) ( '>' ( Prop | '!'  NegatedPropertySet ) ) ? | 'a' | '(' Path ')' 

pparser.pathPrimary = function(state)
	local node
	
	local res = alternative{
		chain{
			lex_char('a'), 
			lex_char(' '),
			idop(function(state) node = instance() end)
		},
		chain{
			chain{
				alternative{
					pparser.prop, 
					chain {lex_char('!'), pparser.negatedPropertySet}
				},
				idop(function(state) node = state.node end)
			},
			alternative{
				chain{
					pparser.pathQualifier,
					idop(
						function(state) 
							node = path.QualifiedStatementNode:create(
								node,
								state.node
							)
						end
					)
				},
				parse_epsilon
			}
		},
		chain{
			lexer.open_parenthesis, 
			pparser.path,
			idop(
				function(state) 
					node = state.node 
				end
			),
			lexer.close_parenthesis
		},
		chain{
			lexer.lex_char('!'),
			pparser.negatedPropertySet,
			idop(
				function(state) 
					node = state.node 
				end
			)
		}
	}(state)
	if res then
		res.node = node
		return res
	end
end

--[[
Tests :

p.parse("a ", p.pathPrimary) => yes
p.parse("!P31", p.pathPrimary) => yes
p.parse("!(P31|instance of)", p.pathPrimary) => yes

--]]

-- stupid function to be eliminated soon (hum)
local function parsePropAndWrap(wrapper)
	return 	chain{ 
				pparser.prop,
				function (state) 
					
					local node = state.node
					local nodes = {}
					nodes[1] = {}
					nodes[1].node = node -- TODO: understand why this is needed instead of just "nodes[1] = node"
					state.node = wrapper(nodes) 
					return state
				end
			}
end
		
pparser.pathPropertySetParser = function(final_node_creator)
    return function(state)
    	return chain{
			lexer.open_parenthesis,
			alternative{
				nary_op_parser(
					pparser.pathOneInPropertySet,
					chain{
						lexer.lex_char("|"),
						pparser.pathOneInPropertySet
					},
					final_node_creator,
					function (node) 
						
						local singlenodes = {}
						singlenodes[1] = node -- mmm
						-- singlenodes[1].node = node
						
						return final_node_creator(singlenodes)
					end
				),
				-- parsePropAndWrap(final_node_creator), -- case for "!(P31)" like patterns, naryopparser or something needs to be fixed to better handle this
													 -- here the solution for negation is to create a negated set with only one property.
				chain{
					parse_epsilon, 
					function(state) 
						state.node = final_node_creator({}) 
						return state 
					end
				} -- allows emty set (to mimic any qualifer allowed, equiv of «*»)
			},
			lexer.close_parenthesis
    	}(state)
    end
end

pparser.propOrSetParser = function(creator)
	return function(state)
		return alternative{
			parsePropAndWrap(creator), -- case for the pattern !P31 , in case it’s negated this stills need to be wrapped on a negated set 
			pparser.pathPropertySetParser(function(nodes) return creator(nodes) end),
		}(state)
	end
end

-- '>' ( Prop | '!'  NegatedPropertySet | PropertySet )
pparser.pathQualifier = chain{
	lex_char(">"),
	alternative{
		chain{
			lex_char("!"),
			pparser.propOrSetParser(function(nodes) return path.NegatedPropertySetNode:create(nodes) end)
		},
		pparser.propOrSetParser(function(nodes) return path.PropertySetNode:create(nodes) end)
	},
	function(state) 
		state.node = path.QualifierSnakNode:create(state.node)
		return state
	end
}
--[[
=p.parse(">!(P31|P31)",p.pathQualifier)
=p.parse(">(P31|P31)",p.pathQualifier)
=p.parse(">P31",p.pathQualifier)
=p.parse(">!P31",p.pathQualifier)
--]]

-- PathNegatedPropertySet	  ::=  	PathOneInPropertySet | '(' ( PathOneInPropertySet ( '|' PathOneInPropertySet )* )? ')'

pparser.negatedPropertySet = pparser.pathPropertySetParser(
	function(nodes) 
		return path.NegatedPropertySetNode:create(nodes) 
	end
)


--[[
Tests :

p.parse("!P31",p.negatedPropertySet)
p.parse("(P31|P32)",p.negatedPropertySet) => yes
p.parse("P31",p.negatedPropertySet) => yes
p.parse("^P31",p.negatedPropertySet) => yes
p.parse("^(P31)",p.negatedPropertySet) => nope
p.parse("(P31)",p.negatedPropertySet) => yes
p.parse("(^P31)",p.negatedPropertySet) => yes
p.parse("(^P31|a|plop)",p.negatedPropertySet) => yes

All good(?)

--]]

-- PathOneInPropertySet	  ::=  	iri | 'a' | '^' ( iri | 'a' ) 

pparser.pathOneInPropertySet = function(state)
	local node = {}
	
	local pElement = alternative{
		chain{
			lexer.lex_char('a'),
			idop(function(state) elem = instance() end)
		},
		chain{
			pparser.prop,
			idop(function(state) elem = state.node end)
		}
	}

	local res = alternative{
		chain{
			lexer.lex_char("^"),
			pElement,
			idop(function(state) node = state.node end)
		},
		chain{
			pElement,
			idop(function(state) node = path.InverseNode:create(state.node) end)
		}
	}(state)

	if res then res.node = node end
	return res
end

			

-- Prop ::= IRIref | Pid | Pname
pparser.prop = function(state)
	local res = alternative{
		chain{
			parser.questionmark(
				chain{
					lex_sparql_prefix,
					lex_char(":")
				}
			),
			lex_pid
		},
		lex_property_name
	}(state)

	if res then
		res.node = path.PropertyNode:create(res.lexed)
		return res
	end
end
--[[

Tests :

p.parse("a ", p.primary) => yes
p.parse("P31@", p.prop) => nope
p.parse("P31", p.prop) => nope
p.parse("P31>P279", p.prop) => nope

--]]


-- PathFirstSequence      ::= '>' PathQualifier ( '/' PathEltOrInverse | '^' PathElt )* 
pparser.pathFirstSequence = nary_op_parser(
--	chain{
		pparser.pathQualifier,
--		function(state)
--			state.node = path.QualifierSnakNode:create(state.node)
--			return state
--		end
--	},
	chain{
		lex_char("/"),
		pparser.pathEltOrInverse
	},
	function (acc) 
		return path.SequenceNode:create(acc) 
	end
)

pparser.path = function(state)
	return pparser.pathAlternative(state)
end

-- PathFirstAlternative   ::= PathFirstSequence ( '|' PathFirstSequence )* | Path
pparser.pathFirstAlternative = alternative{
	pparser.path,
	nary_op_parser(
		pparser.pathFirstSequence, 
		chain{
			lex_char("|"),
			pparser.pathFirstSequence
		},
		function(acc) return path.AlternativeNode:create(acc) end
	),
}



-- plop = p.parse_path("P31/P31/P31>P31/P31") 

pparser.parse_path = function (property_path)
	local res = parser.parse(property_path, pparser.pathFirstAlternative)
	assert(res, "parsing returned a nil obj on path : «" .. property_path .. "»")
	return res
end

-- to test in console
pparser.parse = parser.parse

return pparser